In [3]:
%load_ext autoreload
%autoreload 2

import pandas as pd
import numpy as np 
from matplotlib import pyplot as plt 
import seaborn as sbn 

import statsmodels.api as sm
from sklearn.preprocessing import OneHotEncoder

%aimport HER2_classifier
In [9]:
class myspace(object): 
    def __init__(self): 
        pass

args                    =       myspace()
args.data               =       ['./HER2_SKBR3_data_6-7-21/']
args.out                =       ['./output/'] 
args.drug               =       ['Neratinib']   # ['Trastuzumab']
args.sensitive_line     =       ['WT']
args.resistant_line     =       ['T798I']
args.load               =       ['normalized']  # ['raw']
args.nclus              =       [15]
args.resample_sz        =       [125]
args.burnin             =       [0]


Load Data



In [36]:
data, clover_sel, mscarl_sel = HER2_classifier.load_data(args)
print('len selector:', len(clover_sel))
print(clover_sel[0:5])
print(mscarl_sel[0:5])
data.head()
loading data...
# of datasets to load: 13
mapping drug names to one name...
len selector: 135
['0_x', '15_x', '30_x', '45_x', '60_x']
['0_y', '15_y', '30_y', '45_y', '60_y']
Out[36]:
track_index 0_x 15_x 30_x 45_x 60_x 75_x 90_x 105_x 120_x ... 2805_y 2820_y 2835_y 2850_y 2865_y 2880_y 2895_y drug cell_line mutant
0 3_1000011860_skbr3_erk_akt__WT_untreated 0.836900 0.838897 0.557456 0.852664 1.137005 0.798656 0.696818 0.596282 0.495039 ... NaN NaN NaN NaN NaN NaN NaN untreated SKBR3 WT
1 79_1000288819_skbr3_erk_akt__WT_untreated 0.615120 0.775863 0.840381 0.828898 0.797149 0.827444 0.779779 0.848228 0.868804 ... NaN NaN NaN NaN NaN NaN NaN untreated SKBR3 WT
2 78_1000285319_skbr3_erk_akt__WT_untreated 0.790633 0.865733 0.793790 0.890870 0.870234 0.917848 0.700682 1.004571 0.972406 ... NaN NaN NaN NaN NaN NaN NaN untreated SKBR3 WT
3 78_1000282488_skbr3_erk_akt__WT_untreated 0.919838 0.836807 0.853458 0.712794 0.662686 0.630794 0.635830 0.723558 0.749466 ... NaN NaN NaN NaN NaN NaN NaN untreated SKBR3 WT
4 1_1000002701_skbr3_erk_akt__WT_untreated 0.822612 0.834595 0.920568 0.968030 0.896872 0.906976 0.933929 0.925297 0.913278 ... NaN NaN NaN NaN NaN NaN NaN untreated SKBR3 WT

5 rows × 398 columns

In [37]:
data.drug.unique()
Out[37]:
array(['untreated', '10nm_neratinib', '10ug_ml_trastuzumab'], dtype=object)
In [38]:
data.cell_line.unique()
Out[38]:
array(['SKBR3'], dtype=object)
In [39]:
data.mutant.unique()
Out[39]:
array(['WT', 'T798I', 'ND611', 'ND648', 'S310Y', 'T306M', 'L313V',
       'T733I', 'L755T759DEL', 'I767M', 'A355QFS76', 'D639E', 'Y835F',
       'V842I', 'R678Q', 'V424I', 'D769Y', 'E717K', 'E744D', 'G621AFS31',
       'I435L', 'I886M', 'L43V', 'P416T', 'P702L', 'R203P', 'R340G',
       'R456C', 'R47C', 'S609C', 'T479M', 'W906C', 'Y64F', 'D277H',
       'D638H', 'D873G', 'G1055C', 'G778S779INSLPS', 'H470Q', 'H878Y',
       'L662V', 'M833I', 'L785F', 'L869R', 'P122L', 'S305C', 'W482GFS74',
       'E939G', 'G727A', 'M889I', 'N488S', 'Q1206K', 'W825', 'A1039T',
       'E975Q', 'S310F', 'R34W', 'R929WL', 'S963', 'V797A',
       'Y722V773INSRDGE', 'L1197F', 'R1153L', 'Y1127A1129DEL', 'R897G',
       'P523S', 'D769N', 'E770A771INSGIRD', 'N427K', 'P232S', 'R190Q',
       'A1160V', 'T297I', 'T862S', 'V697L', 'P579L', 'R288Q', 'A37T',
       'D933Y', 'E405D', 'F976L', 'L755S'], dtype=object)


Filter NA



In [40]:
data, clover_sel, mscarl_sel = HER2_classifier.filter_na(data, args, clover_sel, mscarl_sel)
print('len selector:', len(clover_sel))
print(clover_sel[0:5])
print(mscarl_sel[0:5])
data.head()
filtering to drug and removing NAs...
Data shape (untreated + drug): (23672, 398)
length of time series BEFORE removing time points with NA 135
length of time series AFTER removing time points with NA 135
len selector: 135
['0_x' '15_x' '30_x' '45_x' '60_x']
['0_y' '15_y' '30_y' '45_y' '60_y']
Out[40]:
track_index 0_x 15_x 30_x 45_x 60_x 75_x 90_x 105_x 120_x ... 2805_y 2820_y 2835_y 2850_y 2865_y 2880_y 2895_y drug cell_line mutant
0 3_1000011860_skbr3_erk_akt__WT_untreated 0.836900 0.838897 0.557456 0.852664 1.137005 0.798656 0.696818 0.596282 0.495039 ... NaN NaN NaN NaN NaN NaN NaN untreated SKBR3 WT
1 79_1000288819_skbr3_erk_akt__WT_untreated 0.615120 0.775863 0.840381 0.828898 0.797149 0.827444 0.779779 0.848228 0.868804 ... NaN NaN NaN NaN NaN NaN NaN untreated SKBR3 WT
2 78_1000285319_skbr3_erk_akt__WT_untreated 0.790633 0.865733 0.793790 0.890870 0.870234 0.917848 0.700682 1.004571 0.972406 ... NaN NaN NaN NaN NaN NaN NaN untreated SKBR3 WT
3 78_1000282488_skbr3_erk_akt__WT_untreated 0.919838 0.836807 0.853458 0.712794 0.662686 0.630794 0.635830 0.723558 0.749466 ... NaN NaN NaN NaN NaN NaN NaN untreated SKBR3 WT
4 1_1000002701_skbr3_erk_akt__WT_untreated 0.822612 0.834595 0.920568 0.968030 0.896872 0.906976 0.933929 0.925297 0.913278 ... NaN NaN NaN NaN NaN NaN NaN untreated SKBR3 WT

5 rows × 398 columns

In [44]:
data.groupby(['mutant', 'drug']).count()['track_index'].sort_values().head(15)
Out[44]:
mutant           drug          
E770A771INSGIRD  10nm_neratinib    17
W825             untreated         19
W482GFS74        untreated         19
T862S            untreated         20
P702L            untreated         21
M889I            untreated         21
                 10nm_neratinib    21
S963             untreated         22
P122L            untreated         23
E770A771INSGIRD  untreated         24
W825             10nm_neratinib    25
S305C            untreated         26
T862S            10nm_neratinib    29
R897G            untreated         34
W482GFS74        10nm_neratinib    37
Name: track_index, dtype: int64
In [45]:
data.groupby(['mutant', 'drug']).count()['track_index'].sort_values().tail(15)
Out[45]:
mutant          drug          
S310Y           10nm_neratinib     195
A37T            10nm_neratinib     210
P579L           10nm_neratinib     219
L662V           10nm_neratinib     225
H470Q           10nm_neratinib     239
T479M           10nm_neratinib     256
R678Q           10nm_neratinib     274
G778S779INSLPS  10nm_neratinib     284
D277H           10nm_neratinib     309
ND611           untreated          810
                10nm_neratinib    1159
T798I           untreated         1194
                10nm_neratinib    1297
WT              untreated         1369
                10nm_neratinib    1800
Name: track_index, dtype: int64
In [63]:
plt.hist(data[lambda x: ~x.mutant.isin(['WT', 'T798I', 'ND611'])].groupby(['mutant', 'drug']).count()['track_index'])
Out[63]:
(array([26., 33., 32., 27., 20., 11.,  3.,  2.,  2.,  2.]),
 array([ 17. ,  46.2,  75.4, 104.6, 133.8, 163. , 192.2, 221.4, 250.6,
        279.8, 309. ]),
 <BarContainer object of 10 artists>)
2021-06-07T11:09:48.859479 image/svg+xml Matplotlib v3.3.4, https://matplotlib.org/


Add Burn-in

Remove the first few time points



In [46]:
clover_sel, mscarl_sel = HER2_classifier.burnin(args, clover_sel, mscarl_sel)
print('len selector:', len(clover_sel))
print('len selector:', len(mscarl_sel))
print(clover_sel[0:5])
print(mscarl_sel[0:5])
adding burnin of 0
len selector: 135
len selector: 135
['0_x' '15_x' '30_x' '45_x' '60_x']
['0_y' '15_y' '30_y' '45_y' '60_y']


Resample time-series



In [47]:
X_train = HER2_classifier.resample(data, args, clover_sel, mscarl_sel)
resampling time series...
Training data shape BEFORE resampling: (23672, 135, 2)
Training data shape AFTER resampling: (23672, 125, 2)


Fit the time-series K-means clustering



In [48]:
y_pred, km = HER2_classifier.fit_timeseries_kmeans(args, X_train, plot=True, save=None)
performing time-series kmeans clustering...

5.459 --> 3.927 --> 3.838 --> 3.803 --> 3.780 --> 3.762 --> 3.750 --> 3.743 --> 3.738 --> 3.734 --> 3.731 --> 3.729 --> 3.727 --> 3.725 --> 3.723 --> 3.721 --> 3.720 --> 3.718 --> 3.717 --> 3.716 --> 3.715 --> 3.714 --> 3.713 --> 3.713 --> 3.712 --> 3.712 --> 3.711 --> 3.711 --> 3.710 --> 3.710 --> 3.709 --> 3.709 --> 3.709 --> 3.708 --> 3.708 --> 3.707 --> 3.707 --> 3.706 --> 3.706 --> 3.706 --> 3.706 --> 3.706 --> 3.706 --> 3.705 --> 3.705 --> 3.705 --> 3.705 --> 3.705 --> 3.705 --> 3.705 --> 

2021-06-07T11:02:32.794908 image/svg+xml Matplotlib v3.3.4, https://matplotlib.org/


Calculate the cluster proportion

...within each experiment.



In [49]:
cm, lb = HER2_classifier.quantify_cluster_prop(args, data, y_pred)
cm.shape
quantifying experiment by cluster proportions...
Out[49]:
(252, 15)


Visualize the cluster co-occurence within experiment



In [50]:
HER2_classifier.plot_cluster_corr(cm, save=None)
<Figure size 504x504 with 0 Axes>
2021-06-07T11:02:55.114417 image/svg+xml Matplotlib v3.3.4, https://matplotlib.org/


Dimensionality Reduction



In [79]:
res, pca = HER2_classifier.reduce_dim(args, cm, lb, plot=True, save=None)
performing dim. reduction (pca)...
PCA explained variance ratio: [0.49678721 0.11723573]
PC shape: (252, 2)
2021-06-07T11:28:05.854568 image/svg+xml Matplotlib v3.3.4, https://matplotlib.org/
2021-06-07T11:28:10.737741 image/svg+xml Matplotlib v3.3.4, https://matplotlib.org/
In [70]:
pc_loadings = pd.DataFrame({'clus_feat': range(pca.components_.shape[1]), 'PC1':pca.components_[0], 'PC2':pca.components_[1]})
pc_loadings.head()
Out[70]:
clus_feat PC1 PC2
0 0 -0.264865 -0.152827
1 1 -0.064274 0.208965
2 2 -0.274968 -0.140808
3 3 0.320203 0.032544
4 4 -0.141994 -0.269706
In [77]:
plt.figure(figsize=(10,7))
sbn.barplot(x='clus_feat', y='PC1', data=pc_loadings, order=pc_loadings.sort_values(by='PC1').clus_feat)
plt.show()
2021-06-07T11:16:10.407842 image/svg+xml Matplotlib v3.3.4, https://matplotlib.org/
In [76]:
plt.figure(figsize=(10,7))
sbn.barplot(x='clus_feat', y='PC2', data=pc_loadings, order=pc_loadings.sort_values(by='PC2').clus_feat)
plt.show()
2021-06-07T11:16:03.345641 image/svg+xml Matplotlib v3.3.4, https://matplotlib.org/
In [52]:
res
Out[52]:
pc1 pc2 treatment cell_line exp_set
0 0.161031 0.048727 neratinib A1039T H210326_Set2
1 -0.223676 -0.067015 untreated A1039T H210326_Set2
2 0.246843 -0.030411 neratinib A1160V H210409_Set1
3 -0.236713 -0.063892 untreated A1160V H210409_Set1
4 0.163846 0.052475 neratinib A355QFS76 H210205_Set2_2
... ... ... ... ... ...
247 -0.170035 0.003534 untreated Y64F H210219_Set1
248 0.102057 0.055529 neratinib Y722V773INSRDGE H210401_Set1
249 -0.103252 0.083981 untreated Y722V773INSRDGE H210401_Set1
250 0.147673 0.099242 neratinib Y835F H210205_Set2_2
251 -0.170091 0.112895 untreated Y835F H210205_Set2_2

252 rows × 5 columns



Check for Batch Effects



In [56]:
batch_res = HER2_classifier.check_batch_effects(args, res, plot=True, save=None)
batch_res.head()
['H210326_Set2' 'H210409_Set1' 'H210205_Set2_2' 'H210409_Set2'
 'H210423_Set1' 'H210219_Set1' 'H210401_Set2' 'H210212_Set1'
 'H210219_Set2' 'H210326_Set1' 'H210205_Set1_2' 'H210401_Set1'
 'H210212_Set2']
['H210205_Set1_2', 'H210205_Set2_2', 'H210212_Set1', 'H210212_Set2', 'H210219_Set1', 'H210219_Set2', 'H210326_Set1', 'H210326_Set2', 'H210401_Set1', 'H210401_Set2', 'H210409_Set1', 'H210409_Set2', 'H210423_Set1', 'neratinib', 'untreated', 'T798I', 'WT']
feature order:
	 x1  ->  H210205_Set1_2
	 x2  ->  H210205_Set2_2
	 x3  ->  H210212_Set1
	 x4  ->  H210212_Set2
	 x5  ->  H210219_Set1
	 x6  ->  H210219_Set2
	 x7  ->  H210326_Set1
	 x8  ->  H210326_Set2
	 x9  ->  H210401_Set1
	 x10  ->  H210401_Set2
	 x11  ->  H210409_Set1
	 x12  ->  H210409_Set2
	 x13  ->  H210423_Set1
	 x14  ->  neratinib
	 x15  ->  untreated
	 x16  ->  T798I
	 x17  ->  WT
---------------------------------------------------------------------------
PC1 ANOVA
---------------------------------------------------------------------------
                            OLS Regression Results                            
==============================================================================
Dep. Variable:                      y   R-squared:                       0.885
Model:                            OLS   Adj. R-squared:                  0.842
Method:                 Least Squares   F-statistic:                     20.43
Date:                Mon, 07 Jun 2021   Prob (F-statistic):           3.03e-13
Time:                        11:03:04   Log-Likelihood:                 73.208
No. Observations:                  52   AIC:                            -116.4
Df Residuals:                      37   BIC:                            -87.15
Df Model:                          14                                         
Covariance Type:            nonrobust                                         
==============================================================================
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const         -0.0119      0.005     -2.538      0.015      -0.021      -0.002
x1             0.0320      0.034      0.948      0.349      -0.036       0.100
x2            -0.0432      0.034     -1.280      0.208      -0.111       0.025
x3             0.0031      0.034      0.092      0.928      -0.065       0.071
x4            -0.0023      0.034     -0.068      0.946      -0.071       0.066
x5             0.0417      0.034      1.236      0.224      -0.027       0.110
x6            -0.0022      0.034     -0.065      0.949      -0.070       0.066
x7             0.0128      0.034      0.380      0.706      -0.056       0.081
x8            -0.0278      0.034     -0.825      0.415      -0.096       0.041
x9            -0.0042      0.034     -0.124      0.902      -0.072       0.064
x10           -0.0248      0.034     -0.735      0.467      -0.093       0.044
x11            0.0037      0.034      0.109      0.914      -0.065       0.072
x12           -0.0211      0.034     -0.626      0.535      -0.089       0.047
x13            0.0204      0.034      0.606      0.548      -0.048       0.089
x14            0.1520      0.010     15.179      0.000       0.132       0.172
x15           -0.1638      0.010    -16.367      0.000      -0.184      -0.144
x16           -0.0462      0.010     -4.610      0.000      -0.066      -0.026
x17            0.0343      0.010      3.422      0.002       0.014       0.055
==============================================================================
Omnibus:                       25.902   Durbin-Watson:                   0.697
Prob(Omnibus):                  0.000   Jarque-Bera (JB):                4.150
Skew:                          -0.070   Prob(JB):                        0.126
Kurtosis:                       1.623   Cond. No.                     4.76e+16
==============================================================================

Notes:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
[2] The smallest eigenvalue is 4.76e-32. This might indicate that there are
strong multicollinearity problems or that the design matrix is singular.
---------------------------------------------------------------------------
PC2 ANOVA
---------------------------------------------------------------------------
                            OLS Regression Results                            
==============================================================================
Dep. Variable:                      y   R-squared:                       0.714
Model:                            OLS   Adj. R-squared:                  0.606
Method:                 Least Squares   F-statistic:                     6.608
Date:                Mon, 07 Jun 2021   Prob (F-statistic):           2.00e-06
Time:                        11:03:04   Log-Likelihood:                 86.067
No. Observations:                  52   AIC:                            -142.1
Df Residuals:                      37   BIC:                            -112.9
Df Model:                          14                                         
Covariance Type:            nonrobust                                         
==============================================================================
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const          0.0323      0.004      8.823      0.000       0.025       0.040
x1             0.0370      0.026      1.404      0.169      -0.016       0.090
x2            -0.0035      0.026     -0.131      0.896      -0.057       0.050
x3            -0.0093      0.026     -0.353      0.726      -0.063       0.044
x4            -0.0078      0.026     -0.295      0.770      -0.061       0.046
x5             0.0182      0.026      0.692      0.494      -0.035       0.072
x6            -0.0144      0.026     -0.547      0.588      -0.068       0.039
x7             0.0185      0.026      0.702      0.487      -0.035       0.072
x8            -0.0129      0.026     -0.489      0.628      -0.066       0.040
x9            -0.0201      0.026     -0.762      0.451      -0.073       0.033
x10            0.0284      0.026      1.078      0.288      -0.025       0.082
x11            0.0203      0.026      0.772      0.445      -0.033       0.074
x12           -0.0037      0.026     -0.139      0.890      -0.057       0.050
x13           -0.0186      0.026     -0.705      0.485      -0.072       0.035
x14            0.0155      0.008      1.983      0.055      -0.000       0.031
x15            0.0168      0.008      2.147      0.038       0.001       0.033
x16            0.0869      0.008     11.110      0.000       0.071       0.103
x17           -0.0546      0.008     -6.980      0.000      -0.070      -0.039
==============================================================================
Omnibus:                        1.596   Durbin-Watson:                   1.874
Prob(Omnibus):                  0.450   Jarque-Bera (JB):                1.320
Skew:                          -0.387   Prob(JB):                        0.517
Kurtosis:                       2.907   Cond. No.                     4.76e+16
==============================================================================

Notes:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
[2] The smallest eigenvalue is 4.76e-32. This might indicate that there are
strong multicollinearity problems or that the design matrix is singular.
2021-06-07T11:03:04.627919 image/svg+xml Matplotlib v3.3.4, https://matplotlib.org/
2021-06-07T11:03:07.091711 image/svg+xml Matplotlib v3.3.4, https://matplotlib.org/
Out[56]:
constant H210205_Set1_2 H210205_Set2_2 H210212_Set1 H210212_Set2 H210219_Set1 H210219_Set2 H210326_Set1 H210326_Set2 H210401_Set1 H210401_Set2 H210409_Set1 H210409_Set2 H210423_Set1 neratinib untreated T798I WT PC
0 1.548786e-02 0.349278 0.208416 0.927536 0.945985 0.224213 0.948919 0.706340 0.414765 0.902293 0.467051 0.913812 0.534893 0.548480 1.798289e-17 1.573839e-18 4.674795e-05 1.532178e-03 1
1 1.242664e-10 0.168790 0.896442 0.726417 0.769692 0.493509 0.587571 0.486851 0.627625 0.451049 0.288229 0.445069 0.889915 0.485351 5.483875e-02 3.841371e-02 2.411433e-13 3.012377e-08 2


Train classifier on [positive/negative] controls



In [57]:
model, accuracy = HER2_classifier.train_classifier(args, res, plot=True, save=None)
training classifier...
sensitive line: 	 WT
resistant line: 	 T798I
drug:		 neratinib
drug + WT df size:  (26, 5)
X train shape: (26, 2)
# neg class (resistant): 13
# pos class (sensitive): 13
2021-06-07T11:03:09.213165 image/svg+xml Matplotlib v3.3.4, https://matplotlib.org/


Assign mutant sensitivity/resistance calls



In [85]:
prob_res = HER2_classifier.predict_mutants(args, model, res, batch_res)
prob_res
predicting unlabeled sensitivities...
Out[85]:
pc1 pc2 treatment cell_line exp_set prob_res prob_sens call odds_ratio batch PC1_batch_pval PC1_batch_flag PC2_batch_pval PC2_batch_flag
64 0.232905 -0.027193 neratinib P579L H210423_Set1 0.044584 0.955416 sens 0.046664 H210423_Set1 0.548480 False 0.485351 False
85 0.217037 -0.027397 neratinib T306M H210205_Set1_2 0.044895 0.955105 sens 0.047005 H210205_Set1_2 0.349278 False 0.168790 False
40 0.237333 -0.021065 neratinib M833I H210219_Set2 0.045076 0.954924 sens 0.047204 H210219_Set2 0.948919 False 0.587571 False
13 0.216064 -0.032084 neratinib E405D H210409_Set2 0.045154 0.954846 sens 0.047290 H210409_Set2 0.534893 False 0.889915 False
72 0.242047 -0.008056 neratinib R340G H210212_Set2 0.046946 0.953054 sens 0.049259 H210212_Set2 0.945985 False 0.769692 False
... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
76 0.099087 0.061844 neratinib R678Q H210205_Set2_2 0.634225 0.365775 res 1.733922 H210205_Set2_2 0.208416 False 0.896442 False
91 0.096304 0.072438 neratinib V797A H210401_Set1 0.707492 0.292508 res 2.418710 H210401_Set1 0.902293 False 0.451049 False
92 0.061079 0.042302 neratinib V842I H210205_Set2_2 0.713117 0.286883 res 2.485736 H210205_Set2_2 0.208416 False 0.896442 False
25 0.091012 0.072935 neratinib G778S779INSLPS H210219_Set2 0.735133 0.264867 res 2.775485 H210219_Set2 0.948919 False 0.587571 False
38 0.082803 0.141710 neratinib L785F H210326_Set1 0.920393 0.079607 res 11.561642 H210326_Set1 0.706340 False 0.486851 False

100 rows × 14 columns

In [59]:
prob_res.tail(10)
Out[59]:
pc1 pc2 treatment cell_line exp_set prob_res prob_sens call odds_ratio batch PC1_batch_pval PC1_batch_flag PC2_batch_pval PC2_batch_flag
35 0.147088 0.076456 neratinib L755S H210409_Set2 0.397996 0.602004 sens 0.661119 H210409_Set2 0.534893 False 0.889915 False
37 -0.189156 -0.057121 neratinib L755T759DEL H210205_Set1_2 0.466859 0.533141 sens 0.875677 H210205_Set1_2 0.349278 False 0.168790 False
99 0.147673 0.099242 neratinib Y835F H210205_Set2_2 0.534418 0.465582 res 1.147850 H210205_Set2_2 0.208416 False 0.896442 False
20 0.126045 0.074688 neratinib F976L H210409_Set2 0.535775 0.464225 res 1.154130 H210409_Set2 0.534893 False 0.889915 False
98 0.102057 0.055529 neratinib Y722V773INSRDGE H210401_Set1 0.581248 0.418752 res 1.388048 H210401_Set1 0.902293 False 0.451049 False
76 0.099087 0.061844 neratinib R678Q H210205_Set2_2 0.634225 0.365775 res 1.733922 H210205_Set2_2 0.208416 False 0.896442 False
91 0.096304 0.072438 neratinib V797A H210401_Set1 0.707492 0.292508 res 2.418710 H210401_Set1 0.902293 False 0.451049 False
92 0.061079 0.042302 neratinib V842I H210205_Set2_2 0.713117 0.286883 res 2.485736 H210205_Set2_2 0.208416 False 0.896442 False
25 0.091012 0.072935 neratinib G778S779INSLPS H210219_Set2 0.735133 0.264867 res 2.775485 H210219_Set2 0.948919 False 0.587571 False
38 0.082803 0.141710 neratinib L785F H210326_Set1 0.920393 0.079607 res 11.561642 H210326_Set1 0.706340 False 0.486851 False
In [89]:
plt.figure(figsize=(10,10))
sbn.scatterplot(x='pc1', y='pc2', data=prob_res, hue='prob_res', style='call', s=300)
plt.show()
2021-06-07T11:39:24.862603 image/svg+xml Matplotlib v3.3.4, https://matplotlib.org/
In [81]:
0.6/0.4
Out[81]:
1.4999999999999998
In [83]:
np.log2(0.4/0.6)
Out[83]:
-0.5849625007211561
In [ ]: